library(spdep)
## Loading required package: sp
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: sf
## Linking to GEOS 3.8.1, GDAL 3.1.1, PROJ 6.3.1
library(rgdal)
## rgdal: version: 1.5-16, (SVN revision 1050)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.1.1, released 2020/06/22
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/4.0/Resources/library/rgdal/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ runtime: Rel. 6.3.1, February 10th, 2020, [PJ_VERSION: 631]
## Path to PROJ shared files: /Library/Frameworks/R.framework/Versions/4.0/Resources/library/rgdal/proj
## Linking to sp version:1.4-2
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
library(rgeos)
## rgeos version: 0.5-5, (SVN revision 640)
## GEOS runtime version: 3.8.1-CAPI-1.13.3
## Linking to sp version: 1.4-2
## Polygon checking: TRUE
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.1.0 ✓ dplyr 1.0.5
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(splm)
library(tmap)
model_pm = read_csv("intermediate/model_ctyear.csv") %>%
rename(GEOID = County) %>%
arrange((as.numeric(GEOID))) %>%
dplyr::select(GEOID, annual_mean_all, State, year, county_type, popd_q, hhinc_q, Climate_Zone)
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## County = col_character(),
## annual_mean_all = col_double(),
## State = col_double(),
## year = col_double(),
## total_pop = col_character(),
## pop_density = col_double(),
## housing_density = col_double(),
## hh_income = col_double(),
## land_use = col_double(),
## EPA_region = col_double(),
## native_prop = col_double(),
## Climate_Zone = col_character(),
## county_type = col_double(),
## popd_q = col_character(),
## hhinc_q = col_character()
## )
# read in sf counties file from tigris library
library(tigris)
## To enable
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
us_counties = counties()
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|== | 4%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========= | 14%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================ | 24%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|======================= | 33%
|
|======================= | 34%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================ | 41%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|============================== | 44%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 46%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 49%
|
|=================================== | 50%
|
|=================================== | 51%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|===================================== | 54%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 59%
|
|========================================== | 60%
|
|========================================== | 61%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================ | 64%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 69%
|
|================================================= | 70%
|
|================================================= | 71%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|=================================================== | 74%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 79%
|
|======================================================== | 80%
|
|======================================================== | 81%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|========================================================== | 84%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|=============================================================== | 91%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================= | 94%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 99%
|
|======================================================================| 100%
st_geometry(us_counties) = NULL
us_counties = us_counties %>% dplyr::select(GEOID) %>%
arrange((as.numeric(GEOID)))
# select only 2000 data
model_pm2000 = model_pm %>% filter(year == 2000)
# find non-matched counties to drop from counties sf (HI, AK, territories)
drop_counties = anti_join(us_counties, model_pm2000, by="GEOID") %>% pull(GEOID)
# drop those counties from counties sf, only keep study counties (N = 3107)
us_counties_cleaned = counties() %>% filter(!GEOID %in% drop_counties)
# convert sf to spdf to join with PM df
us_counties_cleaned = as_Spatial(us_counties_cleaned)
# join PM data with spatial dataset to create spatial df for 2000
model_pm2000_sp <- merge(us_counties_cleaned, model_pm2000)
# create spatial weights matrix
w_v2 <- poly2nb(model_pm2000_sp, queen = TRUE)
summary(w_v2)
## Neighbour list object:
## Number of regions: 3107
## Number of nonzero links: 18442
## Percentage nonzero weights: 0.1910405
## Average number of links: 5.935629
## Link number distribution:
##
## 1 2 3 4 5 6 7 8 9 10 11 13 14
## 13 24 64 263 670 1102 679 228 50 11 1 1 1
## 13 least connected regions:
## 294 309 367 391 680 1074 1140 1152 1498 1778 2131 2136 2351 with 1 link
## 1 most connected region:
## 2697 with 14 links
# check on bordering counties of given county
w_v2[[730]]
## [1] 516 877 925 2510 2768 2992
model_pm2000_sp$GEOID[730] # random county of interest: champaign county, OH
## [1] "39021"
model_pm2000_sp$GEOID[c(516, 877, 925, 2510, 2768,2992)] # check to see if they are actually bordering counties; they are
## [1] "39091" "39023" "39109" "39097" "39159" "39149"
# convert to matrix and then listw format
# zero.policy = T to include island counties (offshore, but still a part of contiguous US state)
wm <- nb2mat(w_v2, style='B', zero.policy = TRUE)
rwm <- mat2listw(wm, style='W')
I used this resource: https://spatialanalysis.github.io/lab_tutorials/Spatial_Weights_as_Distance_Functions.html#creating-inverse-distance-functions-for-distance-bands
# create inverse distance functions for distance bands
# get coordinates of counties
coordinates_pm = coordinates(model_pm2000_sp)
# get k = 1 nearest neighbors, we want each county to have at least one nearest neighbor and need to extract the max distance between neighbors of all counties
k1 <- knn2nb(knearneigh(coordinates_pm))
# get the max distance between county and closest neighbor to set upper bound
critical.threshold <- max(unlist(nbdists(k1,coordinates_pm)))
# use dnearneigh to calculate distance band
nb.dist.band <- dnearneigh(coordinates_pm, 0, critical.threshold) # args are coordinates, lower bound = 0, upper bound = critical thres
# use nbdists to get distances in similar structure as input neighbors list above
distances <- nbdists(nb.dist.band, coordinates_pm)
distances[1]
## [[1]]
## [1] 1.1366056 0.5244306 0.9981864 1.4365179 1.4260065 1.1964169 1.0687246
## [8] 0.4426206 1.4626447 0.8867323 0.8851562 0.8108661 1.0157625 1.2500343
## [15] 0.3430096 1.2969074 0.8133787 1.3055384 1.4568858 1.0496425 0.4633550
## [22] 1.4405003 0.7061476 0.5822625 0.4065149 1.0017623 1.3733901 1.3640964
## [29] 1.4230511 0.3638592 0.8806252 1.4106154 1.3209256 1.1017951 0.8384595
## [36] 0.6842150 0.8261490 0.9254465 1.3119185 1.3158906 1.3186576 0.4546033
## [43] 1.3580043 0.7714523
# apply 1/x function on distances list
invd1 <- lapply(distances, function(x) (1/x))
# NOTE: I think units are in decimal degrees (given coordinates are lat long)
# check length to make sure it's the same as neighbors list
length(invd1)
## [1] 3107
invd1[1] # check first element
## [[1]]
## [1] 0.8798127 1.9068301 1.0018169 0.6961278 0.7012591 0.8358291 0.9356947
## [8] 2.2592711 0.6836930 1.1277361 1.1297441 1.2332492 0.9844821 0.7999781
## [15] 2.9153704 0.7710651 1.2294396 0.7659675 0.6863956 0.9527053 2.1581723
## [22] 0.6942033 1.4161345 1.7174385 2.4599344 0.9982408 0.7281253 0.7330860
## [29] 0.7027155 2.7483155 1.1355569 0.7089104 0.7570449 0.9076098 1.1926635
## [36] 1.4615290 1.2104355 1.0805595 0.7622425 0.7599416 0.7583470 2.1997199
## [43] 0.7363747 1.2962563
# convert distance band to listw weight
invd.weights <- nb2listw(nb.dist.band, glist = invd1,style = "B")
summary(invd.weights)
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 3107
## Number of nonzero links: 115546
## Percentage nonzero weights: 1.19694
## Average number of links: 37.18893
## Link number distribution:
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
## 9 12 19 30 29 39 32 45 50 37 38 50 37 50 49 38 36 43 40 38 28 35 39 36 46 43
## 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
## 53 56 83 66 61 66 56 60 63 66 75 53 70 72 70 63 59 58 57 42 36 36 28 31 22 20
## 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
## 30 25 29 34 32 42 32 35 43 41 30 29 30 29 28 19 19 17 20 16 15 13 19 22 19 17
## 79 80 81 82 83 84 85
## 12 12 11 6 8 1 2
## 9 least connected regions:
## 118 387 502 708 971 1069 1584 2126 2290 with 1 link
## 2 most connected regions:
## 234 2300 with 85 links
##
## Weights style: B
## Weights constants summary:
## n nn S0 S1 S2
## B 3107 9653449 139151.6 520224.1 33328461
# Check value of weights
invd.weights$weights[1]
## [[1]]
## [1] 0.8798127 1.9068301 1.0018169 0.6961278 0.7012591 0.8358291 0.9356947
## [8] 2.2592711 0.6836930 1.1277361 1.1297441 1.2332492 0.9844821 0.7999781
## [15] 2.9153704 0.7710651 1.2294396 0.7659675 0.6863956 0.9527053 2.1581723
## [22] 0.6942033 1.4161345 1.7174385 2.4599344 0.9982408 0.7281253 0.7330860
## [29] 0.7027155 2.7483155 1.1355569 0.7089104 0.7570449 0.9076098 1.1926635
## [36] 1.4615290 1.2104355 1.0805595 0.7622425 0.7599416 0.7583470 2.1997199
## [43] 0.7363747 1.2962563
fit_1 <- lm(annual_mean_all ~ county_type + popd_q + hhinc_q,
data=model_pm2000_sp)
summary(fit_1)
##
## Call:
## lm(formula = annual_mean_all ~ county_type + popd_q + hhinc_q,
## data = model_pm2000_sp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.7804 -1.3900 0.1023 1.6118 8.4132
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.8874 0.1851 58.815 < 2e-16 ***
## county_type -1.7839 0.1756 -10.159 < 2e-16 ***
## popd_q(160,382] 3.4009 0.1959 17.356 < 2e-16 ***
## popd_q(22.5,32.7] 0.9573 0.1880 5.093 3.73e-07 ***
## popd_q(32.7,45.6] 1.6065 0.1882 8.537 < 2e-16 ***
## popd_q(382,6.95e+04] 3.8717 0.2029 19.078 < 2e-16 ***
## popd_q(4.4,12.8] -1.9318 0.1888 -10.234 < 2e-16 ***
## popd_q(45.6,63] 1.9107 0.1885 10.138 < 2e-16 ***
## popd_q(63,91.8] 2.5152 0.1895 13.272 < 2e-16 ***
## popd_q(91.8,160] 2.9942 0.1914 15.643 < 2e-16 ***
## popd_q[0.1,4.4] -2.9990 0.1882 -15.938 < 2e-16 ***
## hhinc_q(3.57e+04,3.8e+04] -0.7671 0.1876 -4.090 4.42e-05 ***
## hhinc_q(3.8e+04,4.03e+04] -1.1278 0.1879 -6.001 2.18e-09 ***
## hhinc_q(4.03e+04,4.24e+04] -1.2225 0.1877 -6.513 8.55e-11 ***
## hhinc_q(4.24e+04,4.45e+04] -1.8807 0.1883 -9.986 < 2e-16 ***
## hhinc_q(4.45e+04,4.72e+04] -2.0872 0.1885 -11.070 < 2e-16 ***
## hhinc_q(4.72e+04,5.09e+04] -2.0190 0.1895 -10.653 < 2e-16 ***
## hhinc_q(5.09e+04,5.75e+04] -2.3594 0.1923 -12.271 < 2e-16 ***
## hhinc_q(5.75e+04,1.16e+05] -2.7080 0.2029 -13.348 < 2e-16 ***
## hhinc_q[1.94e+04,3.21e+04] 1.0107 0.1882 5.371 8.42e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.333 on 3087 degrees of freedom
## Multiple R-squared: 0.491, Adjusted R-squared: 0.4879
## F-statistic: 156.8 on 19 and 3087 DF, p-value: < 2.2e-16
# check global moran's I test of residuals = 0.7 (much closer to 1 than 0, evidence of spatial autocorr)
lm.morantest(fit_1, rwm, alternative="two.sided", zero.policy = TRUE)
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = annual_mean_all ~ county_type + popd_q + hhinc_q,
## data = model_pm2000_sp)
## weights: rwm
##
## Moran I statistic standard deviate = 67.344, p-value < 2.2e-16
## alternative hypothesis: two.sided
## sample estimates:
## Observed Moran I Expectation Variance
## 0.7100028737 -0.0013188461 0.0001115674
## moran's i scatterplot
moran <- moran.plot(x = model_pm2000_sp$annual_mean_all, listw = rwm)
## highly positive association; strong evidence for spatially autocorrelated data
# calculate local moran's I test of residuals
local <- localmoran(x = model_pm2000_sp$annual_mean_all, listw = rwm)
# binds results to our polygon shapefile
moran.map <- cbind(model_pm2000_sp, local)
tm_shape(moran.map) +
tm_fill(col = "Ii", # local moran's I statistic values
style = "quantile",
title = "local moran statistic")
## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output
## Variable(s) "Ii" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
# scale PM2.5
model_pm2000_sp$s_pm25 = scale(model_pm2000_sp$annual_mean_all) %>% as.vector()
#create a spatial lag variable for scaled pm2.5 and save it to a new column
model_pm2000_sp$lag_s_pm25 <- lag.listw(rwm, model_pm2000_sp$s_pm25) # use 'rwm' listw weights object specified above to create lagged vector
summary(model_pm2000_sp$s_pm25)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.20844 -0.87547 0.07726 0.00000 0.87615 2.20754
summary(model_pm2000_sp$lag_s_pm25)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.1003824 -0.8589864 0.0746933 -0.0007383 0.8714046 2.0687110
x <- model_pm2000_sp$s_pm25
y <- model_pm2000_sp$lag_s_pm25
xx <- tibble(x,y)
moran.plot(x, rwm) # same as scatterplot above (just rescaled)
# add column to show LISA quadrants
model_pm2000_sf <- st_as_sf(model_pm2000_sp) %>%
mutate(quad_sig = ifelse(model_pm2000_sp$s_pm25 > 0 &
model_pm2000_sp$lag_s_pm25 > 0 &
local[,5] <= 0.05,
"high-high",
ifelse(model_pm2000_sp$s_pm25 <= 0 &
model_pm2000_sp$lag_s_pm25 <= 0 &
local[,5] <= 0.05,
"low-low",
ifelse(model_pm2000_sp$s_pm25 > 0 &
model_pm2000_sp$lag_s_pm25 <= 0 &
local[,5] <= 0.05,
"high-low",
ifelse(model_pm2000_sp$s_pm25 <= 0 &
model_pm2000_sp$lag_s_pm25 > 0 &
local[,5] <= 0.05,
"low-high",
"non-significant")))))
table(model_pm2000_sf$quad_sig)
##
## high-high low-low non-significant
## 828 803 1476
# check that all h-h l-l from table() sum up to all significant counties
nrow(local[local[,5] <= 0.05,])
## [1] 1631
qtm(model_pm2000_sf, fill="quad_sig", fill.title="LISA")
fit_1 <- lm(annual_mean_all ~ county_type + popd_q + hhinc_q,
data=model_pm2000_sp)
summary(fit_1)
##
## Call:
## lm(formula = annual_mean_all ~ county_type + popd_q + hhinc_q,
## data = model_pm2000_sp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.7804 -1.3900 0.1023 1.6118 8.4132
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.8874 0.1851 58.815 < 2e-16 ***
## county_type -1.7839 0.1756 -10.159 < 2e-16 ***
## popd_q(160,382] 3.4009 0.1959 17.356 < 2e-16 ***
## popd_q(22.5,32.7] 0.9573 0.1880 5.093 3.73e-07 ***
## popd_q(32.7,45.6] 1.6065 0.1882 8.537 < 2e-16 ***
## popd_q(382,6.95e+04] 3.8717 0.2029 19.078 < 2e-16 ***
## popd_q(4.4,12.8] -1.9318 0.1888 -10.234 < 2e-16 ***
## popd_q(45.6,63] 1.9107 0.1885 10.138 < 2e-16 ***
## popd_q(63,91.8] 2.5152 0.1895 13.272 < 2e-16 ***
## popd_q(91.8,160] 2.9942 0.1914 15.643 < 2e-16 ***
## popd_q[0.1,4.4] -2.9990 0.1882 -15.938 < 2e-16 ***
## hhinc_q(3.57e+04,3.8e+04] -0.7671 0.1876 -4.090 4.42e-05 ***
## hhinc_q(3.8e+04,4.03e+04] -1.1278 0.1879 -6.001 2.18e-09 ***
## hhinc_q(4.03e+04,4.24e+04] -1.2225 0.1877 -6.513 8.55e-11 ***
## hhinc_q(4.24e+04,4.45e+04] -1.8807 0.1883 -9.986 < 2e-16 ***
## hhinc_q(4.45e+04,4.72e+04] -2.0872 0.1885 -11.070 < 2e-16 ***
## hhinc_q(4.72e+04,5.09e+04] -2.0190 0.1895 -10.653 < 2e-16 ***
## hhinc_q(5.09e+04,5.75e+04] -2.3594 0.1923 -12.271 < 2e-16 ***
## hhinc_q(5.75e+04,1.16e+05] -2.7080 0.2029 -13.348 < 2e-16 ***
## hhinc_q[1.94e+04,3.21e+04] 1.0107 0.1882 5.371 8.42e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.333 on 3087 degrees of freedom
## Multiple R-squared: 0.491, Adjusted R-squared: 0.4879
## F-statistic: 156.8 on 19 and 3087 DF, p-value: < 2.2e-16
# check global moran's I test of residuals = 0.7 (much closer to 1 than 0, evidence of spatial autocorr)
lm.morantest(fit_1, invd.weights, alternative="two.sided", zero.policy = TRUE)
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = annual_mean_all ~ county_type + popd_q + hhinc_q,
## data = model_pm2000_sp)
## weights: invd.weights
##
## Moran I statistic standard deviate = 120.88, p-value < 2.2e-16
## alternative hypothesis: two.sided
## sample estimates:
## Observed Moran I Expectation Variance
## 6.134375e-01 -1.009156e-03 2.583624e-05
## moran's i scatterplot
moran_invd <- moran.plot(x = model_pm2000_sp$annual_mean_all, listw = invd.weights)
## highly positive association; strong evidence for spatially autocorrelated data
# calculate local moran's I test of residuals
local_invd <- localmoran(x = model_pm2000_sp$annual_mean_all, listw = invd.weights)
# binds results to our polygon shapefile
moran.map_invd <- cbind(model_pm2000_sp, local_invd)
tm_shape(moran.map_invd) +
tm_fill(col = "Ii", # local moran's I statistic values
style = "quantile",
title = "local moran statistic")
## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output
## Variable(s) "Ii" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
# scale PM2.5
model_pm2000_sp$s_pm25_invd = scale(model_pm2000_sp$annual_mean_all) %>% as.vector()
#create a spatial lag variable for scaled pm2.5 and save it to a new column
model_pm2000_sp$lag_s_pm25_invd <- lag.listw(invd.weights, model_pm2000_sp$s_pm25) # use 'invd.weights' listw weights object specified above to create lagged vector
summary(model_pm2000_sp$s_pm25_invd)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.20844 -0.87547 0.07726 0.00000 0.87615 2.20754
summary(model_pm2000_sp$lag_s_pm25_invd)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -46.738 -14.536 2.584 20.035 52.792 176.689
x <- model_pm2000_sp$s_pm25_invd
y <- model_pm2000_sp$lag_s_pm25_invd
xx <- tibble(x,y)
moran.plot(x, invd.weights) # same as scatterplot above (just rescaled)
# add column to show LISA quadrants
model_pm2000_sf <- st_as_sf(model_pm2000_sp) %>%
mutate(quad_sig_invd = ifelse(model_pm2000_sp$s_pm25_invd > 0 &
model_pm2000_sp$lag_s_pm25_invd > 0 &
local[,5] <= 0.05,
"high-high",
ifelse(model_pm2000_sp$s_pm25_invd <= 0 &
model_pm2000_sp$lag_s_pm25_invd <= 0 &
local[,5] <= 0.05,
"low-low",
ifelse(model_pm2000_sp$s_pm25_invd > 0 &
model_pm2000_sp$lag_s_pm25_invd <= 0 &
local[,5] <= 0.05,
"high-low",
ifelse(model_pm2000_sp$s_pm25_invd <= 0 &
model_pm2000_sp$lag_s_pm25_invd > 0 &
local[,5] <= 0.05,
"low-high",
"non-significant")))))
table(model_pm2000_sf$quad_sig_invd)
##
## high-high low-low non-significant
## 828 803 1476
# check that all h-h l-l from table() sum up to all significant counties
nrow(local[local[,5] <= 0.05,])
## [1] 1631
qtm(model_pm2000_sf, fill="quad_sig_invd", fill.title="LISA")